import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import explained_variance_score # dispersion(spreading) of error in dataset
from sklearn.metrics import confusion_matrix
import warnings
warnings.filterwarnings("ignore")
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
dataset=pd.read_csv("flatrentwithprice.csv")
dataset.head()
| size | distance | floor | age | facing | flat_price | flat_rent | |
|---|---|---|---|---|---|---|---|
| 0 | 870 | 1.5 | 1 | 2 | 1 | 2300000 | 9000 |
| 1 | 900 | 2.4 | 2 | 5 | 1 | 3000000 | 12000 |
| 2 | 1050 | 2.0 | 2 | 10 | 2 | 5000000 | 15000 |
| 3 | 950 | 3.0 | 3 | 20 | 3 | 1700000 | 7000 |
| 4 | 750 | 2.0 | 1 | 7 | 4 | 2700000 | 10000 |
dataset=dataset.drop(['age','distance','floor','facing','flat_rent'],axis=1)
dataset.head()
| size | flat_price | |
|---|---|---|
| 0 | 870 | 2300000 |
| 1 | 900 | 3000000 |
| 2 | 1050 | 5000000 |
| 3 | 950 | 1700000 |
| 4 | 750 | 2700000 |
# X(Independent variables) and y(target variables)
X = dataset.iloc[:,0:].values
y = dataset.iloc[:,1].values
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.4,random_state=42) # test size means size of the set test.
# Decision Tree Classifier
tr_regressor=DecisionTreeClassifier(random_state=42)
tr_regressor.fit(X_train,y_train)
tr_regressor.score(X_test,y_test)
pred_tr=tr_regressor.predict(X_test)
decision_score=tr_regressor.score(X_test,y_test)
expl_tr=explained_variance_score(pred_tr,y_test)
# Random Forest Regression Model
rf_regressor=RandomForestClassifier(random_state=42)
rf_regressor.fit(X_train,y_train)
rf_regressor.score(X_test,y_test)
pred_rf=rf_regressor.predict(X_test)
rf_score=rf_regressor.score(X_test,y_test)
expl_rf=explained_variance_score(pred_rf,y_test)
print("The Score of the Decision Tree Classifier:",round(tr_regressor.score(X_test,y_test)*100))
print("The score of the Random Forest Classifier:",round(rf_regressor.score(X_test,y_test)*100))
The Score of the Decision Tree Classifier: 72 The score of the Random Forest Classifier: 35
# Confusion Matrix for Decision Tree Classifier.
conf_mat=confusion_matrix(y_test,pred_tr)
print(conf_mat)
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]]
# Confusion Matrix for Random Forest Classifier.
conf_mat=confusion_matrix(y_test,pred_rf)
print(conf_mat)
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 2 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]]
# R2 score ,Mean Square Error , Mean Absolute Error,f1 score,Recall Score,Precision Score in Decision Tree Classifier.
mse=mean_squared_error(y_test,pred_tr)
mae=mean_absolute_error(y_test,pred_tr)
score=r2_score(y_test,pred_tr)
f1score=f1_score(y_test,pred_tr,average='micro')#Calculate metrics globally by counting the total true positives, false negatives and false positives.
f1sc=f1_score(y_test,pred_tr,average='macro') #Calculate metrics for each label, and find their unweighted mean.
#This does not take label imbalance into account.
recall=recall_score(y_test, pred_tr, average='macro')
#Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account.
re=recall_score(y_test, pred_tr, average='micro') #Calculate metrics globally by counting the total true positives, false negatives and false positive
rmse=np.sqrt(mse)
preci=precision_score(y_test, pred_tr, average='macro')
pr=precision_score(y_test, pred_tr, average='micro')
print('the mse is ',mse)
print('mae is ',mae)
print('score is ',score)
print('rmse',rmse)
print("The f1 score in micro:",f1score)
print("The F1 score in macro: ",f1sc)
print("The recall score in macro: ",recall)
print("The recall score in micro:",re)
print("The precision score in macro:",preci)
print("The precision score in micro",pr)
the mse is 13250000000.0 mae is 52500.0 score is 0.9923602227067154 rmse 115108.64433221339 The f1 score in micro: 0.7250000000000001 The F1 score in macro: 0.5785714285714285 The recall score in macro: 0.6071428571428571 The recall score in micro: 0.725 The precision score in macro: 0.5714285714285714 The precision score in micro 0.725
# R2 score ,Mean Square Error , Mean Absolute Error,Fi score , Recall Score in Random Forest Classifier .
mse=mean_squared_error(y_test,pred_rf)
mae=mean_absolute_error(y_test,pred_rf)
score=r2_score(y_test,pred_rf)
rmse=np.sqrt(mse)
f1score=f1_score(y_test,pred_rf,average='micro')
f1sc=f1_score(y_test,pred_rf,average='macro')
recall=recall_score(y_test, pred_rf, average='macro')
re=recall_score(y_test, pred_rf, average='micro')
preci=precision_score(y_test, pred_rf, average='macro')
pr=precision_score(y_test, pred_rf, average='micro')
print('the mse is ',mse)
print('mae is ',mae)
print('score is ',score)
print('rmse',rmse)
print("The f1 score is in micro:",f1score)
print("The F1 score is in macro: ",f1sc)
print("The recall score in macro: ",recall)
print("The recall score in micro:",re)
print("The precision score in macro:",preci)
print("The precision score in micro",pr)
the mse is 368500000000.0 mae is 310000.0 score is 0.7875277032018595 rmse 607042.0084310476 The f1 score is in micro: 0.35 The F1 score is in macro: 0.24666666666666667 The recall score in macro: 0.27777777777777773 The recall score in micro: 0.35 The precision score in macro: 0.27888888888888896 The precision score in micro 0.35
tab_formate=pd.DataFrame({'Models':["Decision Tree Classifier","Random Forest Classifier"],
'Score':[ decision_score, rf_score],"Explained_Variance_Score":[ expl_tr, expl_rf]})
tab_formate.sort_values(by="Score",ascending=False)
| Models | Score | Explained_Variance_Score | |
|---|---|---|---|
| 0 | Decision Tree Classifier | 0.725 | 0.991977 |
| 1 | Random Forest Classifier | 0.350 | 0.777980 |
import matplotlib.pyplot as plt
import csv
x = []
y = []
with open('flatrentwithprice.csv','r') as fl:
plots = csv.reader(fl, delimiter = ',')
for row in plots:
x.append(row[0])
y.append(row[5])
plt.bar(x, y, color = 'maroon', width = 0.6, label = "Flat_Price")
plt.xticks(rotation=55)
plt.xlabel("Size")
plt.ylabel('Flat_Price')
plt.title('Size Vs Flat_Price')
plt.legend()
plt.show()
#scatter_plotting.py
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('seaborn') # to get seaborn scatter plot
# read the csv file to extract data
data = pd.read_csv('flatrentwithprice.csv')
size = data['size']
flat_price = data['flat_price']
plt.scatter(size, flat_price, s=100, alpha=0.6, edgecolor='black', linewidth=1)
plt.title('Size vs Flat_Price')
plt.xlabel('Flat_price')
plt.ylabel('Size(square-foot)')
plt.tight_layout()
plt.show()
data.head()
| size | distance | floor | age | facing | flat_price | flat_rent | |
|---|---|---|---|---|---|---|---|
| 0 | 870 | 1.5 | 1 | 2 | 1 | 2300000 | 9000 |
| 1 | 900 | 2.4 | 2 | 5 | 1 | 3000000 | 12000 |
| 2 | 1050 | 2.0 | 2 | 10 | 2 | 5000000 | 15000 |
| 3 | 950 | 3.0 | 3 | 20 | 3 | 1700000 | 7000 |
| 4 | 750 | 2.0 | 1 | 7 | 4 | 2700000 | 10000 |
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv("flatrentwithprice.csv")
fig = plt.figure()
plt.plot(data['size'],data['flat_price'])
fig.autofmt_xdate()
plt.show()
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import matplotlib.pyplot as plt
dataset=pd.read_csv("flatrentwithprice.csv")
dataset.head()
| size | distance | floor | age | facing | flat_price | flat_rent | |
|---|---|---|---|---|---|---|---|
| 0 | 870 | 1.5 | 1 | 2 | 1 | 2300000 | 9000 |
| 1 | 900 | 2.4 | 2 | 5 | 1 | 3000000 | 12000 |
| 2 | 1050 | 2.0 | 2 | 10 | 2 | 5000000 | 15000 |
| 3 | 950 | 3.0 | 3 | 20 | 3 | 1700000 | 7000 |
| 4 | 750 | 2.0 | 1 | 7 | 4 | 2700000 | 10000 |
dataset=dataset.drop(['size','floor','age','facing','flat_rent'],axis=1)
dataset.head()
| distance | flat_price | |
|---|---|---|
| 0 | 1.5 | 2300000 |
| 1 | 2.4 | 3000000 |
| 2 | 2.0 | 5000000 |
| 3 | 3.0 | 1700000 |
| 4 | 2.0 | 2700000 |
X = dataset.iloc[:,0:].values
y=dataset.iloc[:,1].values
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)
# Decision Tree Classifier
tr_regressor=DecisionTreeClassifier(random_state=42)
tr_regressor.fit(X_train,y_train)
tr_regressor.score(X_test,y_test)
pred_tr=tr_regressor.predict(X_test)
decision_score=tr_regressor.score(X_test,y_test)
expl_tr=explained_variance_score(pred_tr,y_test)
# Random Forest Classifier
rf_regressor=RandomForestClassifier(random_state=42)
rf_regressor.fit(X_train,y_train)
rf_regressor.score(X_test,y_test)
pred_rf=rf_regressor.predict(X_test)
rf_score=rf_regressor.score(X_test,y_test)
expl_rf=explained_variance_score(pred_rf,y_test)
print("The Score of the Decision Tree Classifier between distance and flat_price:",round(tr_regressor.score(X_test,y_test)*100))
print("The score of the Random Forest Classifier between distance and flat_price:",round(rf_regressor.score(X_test,y_test)*100))
The Score of the Decision Tree Classifier between distance and flat_price: 63 The score of the Random Forest Classifier between distance and flat_price: 30
tab_formate=pd.DataFrame({'Models':["Decision Tree Classifier","Random Forest Classifier"],
'Score':[ decision_score, rf_score],"Explained_Variance_Score":[ expl_tr, expl_rf]})
tab_formate.sort_values(by="Score",ascending=False)
| Models | Score | Explained_Variance_Score | |
|---|---|---|---|
| 0 | Decision Tree Classifier | 0.633333 | 0.992595 |
| 1 | Random Forest Classifier | 0.300000 | 0.967750 |
dataset=pd.read_csv("flatrentwithprice.csv")
x=dataset[["size"]]
y=dataset[["flat_price"]]
data=pd.read_csv('flatrentwithprice.csv')
data.head()
| size | distance | floor | age | facing | flat_price | flat_rent | |
|---|---|---|---|---|---|---|---|
| 0 | 870 | 1.5 | 1 | 2 | 1 | 2300000 | 9000 |
| 1 | 900 | 2.4 | 2 | 5 | 1 | 3000000 | 12000 |
| 2 | 1050 | 2.0 | 2 | 10 | 2 | 5000000 | 15000 |
| 3 | 950 | 3.0 | 3 | 20 | 3 | 1700000 | 7000 |
| 4 | 750 | 2.0 | 1 | 7 | 4 | 2700000 | 10000 |
import matplotlib.pyplot as plt
import csv
x = []
y = []
with open('flatrentwithprice.csv','r') as fl:
plots = csv.reader(fl, delimiter = ',')
for row in plots:
x.append(row[1])
y.append(row[5])
plt.bar(x, y, color = 'blue', width = 0.4, label = "Flat_Price")
plt.xticks(rotation=55)
plt.xlabel("Distance")
plt.ylabel('Flat_Price')
plt.title('Distance Vs Flat_Price')
plt.legend()
plt.show()
#scatter_plotting.py
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('seaborn') # to get seaborn scatter plot
# read the csv file to extract data
data = pd.read_csv('flatrentwithprice.csv')
distance = data['distance']
flat_price = data['flat_price']
plt.scatter(flat_price, distance, s=100, alpha=0.6, edgecolor='black', linewidth=1)
plt.title('Distance vs Flat_Price')
plt.xlabel('Flat_price')
plt.ylabel('Distance')
plt.tight_layout()
plt.show()
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
fd=pd.read_csv("flatrentwithprice.csv")
X = pd.DataFrame(sc_X.fit_transform(fd.drop(["flat_rent"],axis = 1)),
columns=['size', 'distance', 'floor', 'age', 'facing', 'flat_price'])
y=fd['flat_rent']
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,y,test_size=.2,random_state=42)
corr=X_train.corr()
sns.heatmap(corr,annot=True,cmap='Blues')
<AxesSubplot:>
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
data = pd.read_csv("flatrentwithprice.csv")
print(data.head())
size distance floor age facing flat_price flat_rent 0 870 1.5 1 2 1 2300000 9000 1 900 2.4 2 5 1 3000000 12000 2 1050 2.0 2 10 2 5000000 15000 3 950 3.0 3 20 3 1700000 7000 4 750 2.0 1 7 4 2700000 10000
print(data.describe())
size distance floor age facing \
count 100.000000 100.000000 100.000000 100.000000 100.00000
mean 825.120000 2.669000 4.270000 12.270000 2.44000
std 184.569457 1.347553 2.662326 6.731285 1.16619
min 500.000000 1.000000 1.000000 1.000000 1.00000
25% 680.000000 2.000000 2.000000 7.750000 1.00000
50% 840.000000 2.000000 4.000000 12.000000 3.00000
75% 950.000000 4.000000 6.000000 16.250000 3.00000
max 1250.000000 6.000000 12.000000 30.000000 4.00000
flat_price flat_rent
count 1.000000e+02 100.000000
mean 3.015000e+06 9018.000000
std 1.231520e+06 2776.720688
min 1.200000e+06 4000.000000
25% 2.100000e+06 7000.000000
50% 2.650000e+06 8000.000000
75% 3.800000e+06 10000.000000
max 5.900000e+06 18000.000000
print("This is for flat_Price \n")
print(f"Mean flat_price: {data.flat_price.mean()}")
print(f"Median flat_price: {data.flat_price.median()}")
print(f"Highest flat_price: {data.flat_price.max()}")
print(f"Lowest flat_price: {data.flat_price.min()}")
# to finding the flat_rent
print("\n This is for Flat_Rent\n")
print(f"Mean flat_rent: {data.flat_rent.mean()}")
print(f"Median flat_rent: {data.flat_rent.median()}")
print(f"Highest flat_rent: {data.flat_rent.max()}")
print(f"Lowest flat_rent: {data.flat_rent.min()}")
data.head()
This is for flat_Price Mean flat_price: 3015000.0 Median flat_price: 2650000.0 Highest flat_price: 5900000 Lowest flat_price: 1200000 This is for Flat_Rent Mean flat_rent: 9018.0 Median flat_rent: 8000.0 Highest flat_rent: 18000 Lowest flat_rent: 4000
| size | distance | floor | age | facing | flat_price | flat_rent | |
|---|---|---|---|---|---|---|---|
| 0 | 870 | 1.5 | 1 | 2 | 1 | 2300000 | 9000 |
| 1 | 900 | 2.4 | 2 | 5 | 1 | 3000000 | 12000 |
| 2 | 1050 | 2.0 | 2 | 10 | 2 | 5000000 | 15000 |
| 3 | 950 | 3.0 | 3 | 20 | 3 | 1700000 | 7000 |
| 4 | 750 | 2.0 | 1 | 7 | 4 | 2700000 | 10000 |
figure = px.bar(data, x=data["flat_price"],
y = data["size"],
color = data["facing"],
title="Size , flat_price and Facing")
figure.show()
figure = px.bar(data, x=data["flat_price"],
y = data["size"],
color = data["floor"],
title="Size , Flat_price, Floor")
figure.show()
data.head()
| size | distance | floor | age | facing | flat_price | flat_rent | |
|---|---|---|---|---|---|---|---|
| 0 | 870 | 1.5 | 1 | 2 | 1 | 2300000 | 9000 |
| 1 | 900 | 2.4 | 2 | 5 | 1 | 3000000 | 12000 |
| 2 | 1050 | 2.0 | 2 | 10 | 2 | 5000000 | 15000 |
| 3 | 950 | 3.0 | 3 | 20 | 3 | 1700000 | 7000 |
| 4 | 750 | 2.0 | 1 | 7 | 4 | 2700000 | 10000 |
figure = px.bar(data, x=data["flat_rent"],
y = data["size"],
color = data["facing"],
title="Size ,Flat_rent and Facing")
figure.show()
data.head()
| size | distance | floor | age | facing | flat_price | flat_rent | |
|---|---|---|---|---|---|---|---|
| 0 | 870 | 1.5 | 1 | 2 | 1 | 2300000 | 9000 |
| 1 | 900 | 2.4 | 2 | 5 | 1 | 3000000 | 12000 |
| 2 | 1050 | 2.0 | 2 | 10 | 2 | 5000000 | 15000 |
| 3 | 950 | 3.0 | 3 | 20 | 3 | 1700000 | 7000 |
| 4 | 750 | 2.0 | 1 | 7 | 4 | 2700000 | 10000 |
# Now we are going to splitting the data.
from sklearn.model_selection import train_test_split
x=np.array(data[["size","distance","floor","age","facing"]])
y=np.array(data[["flat_price"]])
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.4,random_state=42)
from keras.models import Sequential
from keras.layers import Dense, LSTM
model = Sequential()
model.add(LSTM(128, return_sequences=True,
input_shape= (xtrain.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm (LSTM) (None, 5, 128) 66560
lstm_1 (LSTM) (None, 64) 49408
dense (Dense) (None, 25) 1625
dense_1 (Dense) (None, 1) 26
=================================================================
Total params: 117,619
Trainable params: 117,619
Non-trainable params: 0
_________________________________________________________________
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(xtrain, ytrain, batch_size=1, epochs=21)
Epoch 1/21 60/60 [==============================] - 4s 5ms/step - loss: 10645548302336.0000 Epoch 2/21 60/60 [==============================] - 0s 5ms/step - loss: 10645253652480.0000 Epoch 3/21 60/60 [==============================] - 0s 4ms/step - loss: 10644838416384.0000 Epoch 4/21 60/60 [==============================] - 0s 5ms/step - loss: 10644275331072.0000 Epoch 5/21 60/60 [==============================] - 0s 5ms/step - loss: 10643569639424.0000 Epoch 6/21 60/60 [==============================] - 0s 5ms/step - loss: 10642715049984.0000 Epoch 7/21 60/60 [==============================] - 0s 5ms/step - loss: 10641697931264.0000 Epoch 8/21 60/60 [==============================] - 0s 5ms/step - loss: 10640523526144.0000 Epoch 9/21 60/60 [==============================] - 0s 5ms/step - loss: 10639208611840.0000 Epoch 10/21 60/60 [==============================] - 0s 5ms/step - loss: 10637732216832.0000 Epoch 11/21 60/60 [==============================] - 0s 5ms/step - loss: 10636113215488.0000 Epoch 12/21 60/60 [==============================] - 0s 5ms/step - loss: 10634363142144.0000 Epoch 13/21 60/60 [==============================] - 0s 5ms/step - loss: 10632465219584.0000 Epoch 14/21 60/60 [==============================] - 0s 5ms/step - loss: 10630419447808.0000 Epoch 15/21 60/60 [==============================] - 0s 5ms/step - loss: 10628263575552.0000 Epoch 16/21 60/60 [==============================] - 0s 5ms/step - loss: 10625954611200.0000 Epoch 17/21 60/60 [==============================] - 0s 5ms/step - loss: 10623520866304.0000 Epoch 18/21 60/60 [==============================] - 0s 5ms/step - loss: 10620962340864.0000 Epoch 19/21 60/60 [==============================] - 0s 5ms/step - loss: 10618265403392.0000 Epoch 20/21 60/60 [==============================] - 0s 5ms/step - loss: 10615463608320.0000 Epoch 21/21 60/60 [==============================] - 0s 5ms/step - loss: 10612530741248.0000
<keras.callbacks.History at 0x15731a48cd0>
data.head()
| size | distance | floor | age | facing | flat_price | flat_rent | |
|---|---|---|---|---|---|---|---|
| 0 | 870 | 1.5 | 1 | 2 | 1 | 2300000 | 9000 |
| 1 | 900 | 2.4 | 2 | 5 | 1 | 3000000 | 12000 |
| 2 | 1050 | 2.0 | 2 | 10 | 2 | 5000000 | 15000 |
| 3 | 950 | 3.0 | 3 | 20 | 3 | 1700000 | 7000 |
| 4 | 750 | 2.0 | 1 | 7 | 4 | 2700000 | 10000 |
print("Enter House Details to Predict Flat_Price")
b = int(input("Size of the House: "))
c = float(input("Distance: "))
d = int(input("Number of Floors: "))
e = int(input("Age of the buildings: "))
f = int(input("Facing of the house: "))
features = np.array([[ b, c, d, e, f]])
print("Predicted House Price = ", model.predict(features))
Enter House Details to Predict Flat_Price Size of the House: 680 Distance: 1.5 Number of Floors: 3 Age of the buildings: 0 Facing of the house: 4 1/1 [==============================] - 0s 28ms/step Predicted House Price = [[5698.6167]]
from sklearn import svm, datasets
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
data=pd.read_csv("flatrentwithprice.csv")
x=data[["size","distance","floor","age","facing"]]
y=data[["flat_price"]]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.4, random_state=42)
clf = LogisticRegression(penalty='l2', C=0.1)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("Accuracy", metrics.accuracy_score(y_test, y_pred))
Accuracy 0.025
x=data[["size"]]
y=data[["flat_price"]]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.4, random_state=44)
clf = LogisticRegression(penalty='l2', C=0.1)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("Accuracy", metrics.accuracy_score(y_test, y_pred))
data.head()
Accuracy 0.025
| size | distance | floor | age | facing | flat_price | flat_rent | |
|---|---|---|---|---|---|---|---|
| 0 | 870 | 1.5 | 1 | 2 | 1 | 2300000 | 9000 |
| 1 | 900 | 2.4 | 2 | 5 | 1 | 3000000 | 12000 |
| 2 | 1050 | 2.0 | 2 | 10 | 2 | 5000000 | 15000 |
| 3 | 950 | 3.0 | 3 | 20 | 3 | 1700000 | 7000 |
| 4 | 750 | 2.0 | 1 | 7 | 4 | 2700000 | 10000 |
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('flatrentwithprice.csv')
df[["size","flat_price"]].plot(
xlabel='size',
ylabel='flat_price',
title='Size vs Flat_price'
)
plt.show()